####################################
# Code to create plots for:        #
#                                  #
# "From Economic Competition to    #
# Military Combat: Export          #
# Similarity and International     #
# Conflict" (appendix)             #
#                                  #
# J. Tyson Chatagnier and Kerim    #
# Can Kavakli                      #
#                                  #
# Published in the Journal of      #
# Conflict Resolution              #
####################################

library(foreign)
library(reshape)
library(ggplot2)
library(gridExtra)

rm(list = ls(all = TRUE))

#######################
# Define function to  #
# calculate predicted #
# values.             #
#######################

GetPreds <- function(xmat,
                     coefs,
                     covmat,
                     var.change,
                     interaction = NULL,
                     change.lo   = NULL,
                     change.hi   = NULL,
                     change.vec  = NULL,
                     var.sq      = NULL,
                     app.fn      = "median",
                     t.diff      = NULL,
                     tval        = 1.96,
                     n.out       = 1000,
                     ci          = FALSE
                     ) {
                        # xmat is a matrix of x variables
                        # Note that xmat should include a constant.
                        # coefs is a vector of coefficient values
                        # covmat is the covariance matrix
                        # var.change is a number specifying which X
                        # is the variable of interest. This should
                        # be a number greater than 1, since a 1 would
                        # indicate the intercept.
                        # Interaction is a vector of length 2, with
                        # a first element that gives the column
                        # representing the variable with which
                        # var.change is to be interacted, and the
                        # second giving the column in which the
                        # interaction should be stored.
                        # var.sq is used if we want to include a 
                        # squared term. Just tell getPreds() which
                        # column that should be.
    k <- ncol(xmat)
    b <- coefs
    if (is.null(change.vec)) {
        change.lo <- ifelse(is.null(change.lo),
                            min(xmat[, var.change]),
                            change.lo
        )
        change.hi <- ifelse(is.null(change.hi),
                            max(xmat[, var.change]),
                            change.hi
        )
        change.vec <- seq(from       = change.lo,
                          to         = change.hi,
                          length.out = n.out
        )
    }
    if (is.numeric(app.fn)) {
        xc <- app.fn
    } else {
        x.nona <- na.omit(xmat)
        xc <- apply(x.nona,
                    2,
                    app.fn
        )
                        # Make sure there are no NAs when we apply the
                        # function to the matrix.
    }
    if (is.numeric(t.diff)) {
                        # t.diff is used to change the time
                        # easily, since we might care about
                        # particular values. It is a vector
                        # with two elements: the first element
                        # is the location of the original time
                        # variable, and the second is its
                        # desired value.
        t.vals <- c(t.diff[2],
                    t.diff[2] ^ 2,
                    t.diff[2] ^ 3
                    )
        xc[t.diff[1] : (t.diff[1] + 2)] <- t.vals
    }
    xc.mat <- matrix(rep(xc,
                         each = n.out
                     ),
                     ncol = k,
                     nrow = n.out
    )
                        # Create a matrix of constant values.
    xc.mat[, var.change] <- change.vec
                        # Vary the variable that needs varying.
    if (is.numeric(var.sq)) {
      xc.mat[, var.sq] <- change.vec ^ 2
    }
    if (length(interaction) == 2) {
      xc.mat[, interaction[2]] <- change.vec * xc[interaction[1]]
    }
    if (ncol(xc.mat) != nrow(as.matrix(b))) {
      b <- t(as.matrix(b))
    }
    xb <- xc.mat %*% b
    preds <- plogis(xb)
    if (ci) {
        deriv <- dlogis(xb)
                        # dlogis() is the derivative of plogis()
        deriv.mat <- sweep(xc.mat,
                           1,
                           deriv,
                           '*'
                     )
        var.pp <- deriv.mat %*% covmat %*% t(deriv.mat)
        se.pp <- sqrt(diag(var.pp))
        ci.lo <- preds - (tval * se.pp)
        ci.lo <- ifelse(ci.lo < 0,
                        0,
                        ifelse(ci.lo > 1,
                               1,
                               ci.lo)
                        )
        ci.hi <- preds + (tval * se.pp)
        ci.hi <- ifelse(ci.hi < 0,
                        0,
                        ifelse(ci.hi > 1,
                               1,
                               ci.hi)
                        )
        return(list(pred = preds,
                    change = change.vec,
                    lo = ci.lo,
                    hi = ci.hi
                    )
        )
    } else {
        return(list(pred = preds,
                    change = change.vec
                    )
        )
    }
}


###########################
# Read in data and create #
# necessary variables     #
###########################

sim.data <- read.dta("export_similarity_data.dta")

sim.data$dyadid <- sim.data$ccode1 + 0.001 * sim.data$ccode2
                        # Create a unique dyad identifier

##############################
# Create lagged versions     #
# of some variables, as well #
# as dummies for decades.    #
##############################

library(DataCombine)

sim.data$l.caprat <- slide(data.frame(var    = sim.data$pwin_cap,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]
                        # Use the slide() function from the DataCombine
                        # package to lag by one unit. The fourth column
                        # of the resulting matrix is the lagged vector.

sim.data$l.caprat2 <- sim.data$l.caprat ^ 2

sim.data$l.allied <- slide(data.frame(var    = sim.data$allied,
                                     dyadid = sim.data$dyadid,
                                     year   = sim.data$year
                                     ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.lodem <- slide(data.frame(var     = sim.data$smldem,
                                     dyadid = sim.data$dyadid,
                                     year   = sim.data$year
                                     ),
                          Var      = "var",
                          GroupVar = "dyadid",
                          slideBy  = -1
                          )[, 4]

sim.data$l.s <- slide(data.frame(var    = sim.data$s3un,
                                 dyadid = sim.data$dyadid,
                                 year   = sim.data$year
                                 ),
                      Var      = "var",
                      GroupVar = "dyadid",
                      slideBy  = -1
                      )[, 4]


sim.data$sixties <- ifelse(sim.data$year < 1970,
                           1,
                           0
                           )

sim.data$seventies <- ifelse(sim.data$year >= 1970 & sim.data$year < 1980,
                             1,
                             0
                             )

sim.data$eighties <- ifelse(sim.data$year >= 1980 & sim.data$year < 1990,
                            1,
                            0
                            )

sim.data$nineties <- ifelse(sim.data$year >= 1990,
                            1,
                            0
                            )
                        # We are combining 2000 into the 90s.

sim.data$l.sim <- slide(data.frame(var    = sim.data$exports,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.dep <- slide(data.frame(var    = sim.data$lowerdep2015,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.riv <- slide(data.frame(var    = sim.data$rivalry,
                                   dyadid = sim.data$dyadid,
                                   year   = sim.data$year
                                   ),
                        Var      = "var",
                        GroupVar = "dyadid",
                        slideBy  = -1
                        )[, 4]

sim.data$l.jointdem <- slide(data.frame(var    = sim.data$jointdem,
                                        dyadid = sim.data$dyadid,
                                        year   = sim.data$year
                                        ),
                             Var      = "var",
                             GroupVar = "dyadid",
                             slideBy  = -1
                             )[, 4]

sim.data$l.rawexp <- slide(data.frame(var    = sim.data$rawexports,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.raw_dep <- slide(data.frame(var    = sim.data$lower_rawdep2015,
                                       dyadid = sim.data$dyadid,
                                       year   = sim.data$year
                                       ),
                            Var      = "var",
                            GroupVar = "dyadid",
                            slideBy  = -1
                            )[, 4]

sim.data$l.manexp <- slide(data.frame(var    = sim.data$manexports,
                                      dyadid = sim.data$dyadid,
                                      year   = sim.data$year
                                      ),
                           Var      = "var",
                           GroupVar = "dyadid",
                           slideBy  = -1
                           )[, 4]

sim.data$l.man_dep <- slide(data.frame(var    = sim.data$lower_mandep2015,
                                       dyadid = sim.data$dyadid,
                                       year   = sim.data$year
                                       ),
                            Var      = "var",
                            GroupVar = "dyadid",
                            slideBy  = -1
                            )[, 4]

############
# Figure 1 #
############

sim.data <- sim.data[order(sim.data$year), ]

us.rus <- which(sim.data$dyadid == 2.365)
us.chin <- which(sim.data$dyadid == 2.710)
us.can <- which(sim.data$dyadid == 2.020)

iraq.iran <- which(sim.data$dyadid == 630.645)
iraq.is <- which(sim.data$dyadid == 645.666)
is.turk <- which(sim.data$dyadid == 640.666)

braz.chile <- which(sim.data$dyadid == 140.160)
braz.peru <- which(sim.data$dyadid == 135.140)
braz.mex <- which(sim.data$dyadid == 70.140)

df.all <- data.frame(year = sim.data$year[c(us.rus,
                                            us.chin,
                                            us.can,
                                            iraq.iran,
                                            iraq.is,
                                            is.turk,
                                            braz.chile,
                                            braz.peru,
                                            braz.mex
                                            )
                                          ],
                     sim = sim.data$exports[c(us.rus,
                                              us.chin,
                                              us.can,
                                              iraq.iran,
                                              iraq.is,
                                              is.turk,
                                              braz.chile,
                                              braz.peru,
                                              braz.mex
                                              )
                                            ],
                     id = sim.data$dyadid[c(us.rus,
                                            us.chin,
                                            us.can,
                                            iraq.iran,
                                            iraq.is,
                                            is.turk,                                        
                                            braz.chile,
                                            braz.peru,
                                            braz.mex
                                            )
                                          ]
                     )


plot.ur <- ggplot(data = df.all[df.all$id == 2.365, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("U.S.-Russia") + 
             theme_bw() +
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))

plot.uch <- ggplot(data = df.all[df.all$id == 2.710, ],
                   aes(x = year,
                       y = sim
                       )
                   ) +
              geom_line(size = 1) +
              ylim(-.1, 1) +
              xlab("") +
              ylab("") +
              ggtitle("U.S.-China") + 
              theme_bw() + 
              theme(plot.title = element_text(size = rel(0.9))) +
              theme(axis.text = element_text(size = 8))

plot.uca <- ggplot(data = df.all[df.all$id == 2.020, ],
                   aes(x = year,
                       y = sim
                       )
                   ) +
              geom_line(size = 1) +
              ylim(-.1, 1) +
              xlab("") +
              ylab("") +
              ggtitle("U.S.-Canada") + 
              theme_bw() + 
              theme(plot.title = element_text(size = rel(0.9))) +
              theme(axis.text = element_text(size = 8))

plot.ii <- ggplot(data = df.all[df.all$id == 630.645, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("Iraq-Iran") + 
             theme_bw() + 
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))

plot.iis <- ggplot(data = df.all[df.all$id == 645.666, ],
                   aes(x = year,
                       y = sim
                       )
                   ) +
              geom_line(size = 1) +
              ylim(-.1, 1) +
              xlab("") +
              ylab("") +
              ggtitle("Iraq-Israel") + 
              theme_bw() + 
              theme(plot.title = element_text(size = rel(0.9))) +
              theme(axis.text = element_text(size = 8))

plot.it <- ggplot(data = df.all[df.all$id == 640.666, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("Israel-Turkey") + 
             theme_bw() + 
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))

plot.bm <- ggplot(data = df.all[df.all$id == 70.140, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("Brazil-Mexico") + 
             theme_bw() + 
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))

plot.bc <- ggplot(data = df.all[df.all$id == 140.160, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("Brazil-Chile") + 
             theme_bw() + 
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))


plot.bp <- ggplot(data = df.all[df.all$id == 135.140, ],
                  aes(x = year,
                      y = sim
                      )
                  ) +
             geom_line(size = 1) +
             ylim(-.1, 1) +
             xlab("") +
             ylab("") +
             ggtitle("Brazil-Peru") + 
             theme_bw() + 
             theme(plot.title = element_text(size = rel(0.9))) +
             theme(axis.text = element_text(size = 8))

grid.arrange(arrangeGrob(plot.ur,
                         plot.uch,
                         plot.uca,
                         plot.ii,
                         plot.iis,
                         plot.it,
                         plot.bm,
                         plot.bc,
                         plot.bp,
                         nrow   = 3,
                         left   = "\nExport Similarity",
                         bottom = "Year\n"
                         )
             )

############
# Figure 2 #
############

b <- as.numeric(read.table("beta_maingraph.txt"))

vcov <- as.matrix(read.table("vcov_maingraph.txt"))
                        # Estimation is performed in Stata. Results
                        # are output to .txt files, which can be 
                        # read into R.

rel.data <- data.frame(py        = sim.data$mid_py,
                       py2       = sim.data$mid_py2,
                       py3       = sim.data$mid_py3,
                       contig    = sim.data$contig_dummy,
                       lndist    = sim.data$lndist,
                       caprat    = sim.data$l.caprat,
                       jointdem  = sim.data$l.jointdem,
                       l.s       = sim.data$l.s,
                       majmaj    = sim.data$majmaj,
                       minmaj    = sim.data$majmin,
                       rivals    = sim.data$l.riv,
                       europe    = sim.data$both_europe,
                       sixties   = sim.data$sixties,
                       seventies = sim.data$seventies,
                       eighties  = sim.data$eighties,
                       trade     = sim.data$l.dep,
                       sim       = sim.data$l.sim,
                       constant  = 1
                       )

colnames(rel.data) <- c("t",
                        "t^2",
                        "t^3",
                        "contig",
                        "lndist",
                        "initcapshare",
                        "jointdem",
                        "s",
                        "majmaj",
                        "minmaj",
                        "rivals",
                        "europe",
                        "sixties",
                        "seventies",
                        "eighties",
                        "dependence",
                        "similarity",
                        "constant"
                        )


ip <- which(sim.data$ccode1 == 750 & 
            sim.data$ccode2 == 770 & 
            sim.data$year == 1971
            )

ik <- which(sim.data$ccode1 == 645 & 
            sim.data$ccode2 == 690 & 
            sim.data$year == 1988
            )
                        # Get values for India-Pakistan 1971 and 
                        # Iraq-Kuwait 1988, respectively.

####################################
# Left panel: India-Pakistan, 1971 #
####################################

val.vec <- as.numeric(rel.data[ip, ])

sim.vals <- GetPreds(xmat           = rel.data,
                     coefs          = b,
                     covmat         = vcov,
                     var.change     = 17,
                     interaction    = NULL,
                     change.lo      = min(rel.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.hi      = max(rel.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.vec     = NULL,
                     var.sq         = NULL,
                     app.fn         = val.vec,
                     tval           = 1.96,
                     n.out          = 1000,
                     ci             = TRUE
                     )

sim.df <- data.frame(change     = sim.vals$change,
                     pred       = sim.vals$pred,
                     lo         = sim.vals$lo,
                     hi         = sim.vals$hi
                     )

min.dist <- which(abs(sim.df$change - val.vec[17]) == min(abs(sim.df$change - val.vec[17])))

ggplot(data = sim.df,
       aes(x = change,
           y = pred
           )
       ) +
       geom_line(size   = 1.25,
                 colour = "darkblue"
                 ) +
       geom_ribbon(data = sim.df,
                   aes(ymin = lo,
                       ymax = hi
                       ),
                   fill   = "darkblue",
                   alpha  = 0.3,
                   colour = NA
                   ) +
       geom_point(aes(x      = val.vec[17],
                      y      = sim.df$pred[min.dist]
                      ),
                  colour = "red",
                  size   = 3.5
                  ) +
       ggtitle("India and Pakistan, 1971") +
       xlab("Portfolio Similarity") +
       ylab("Probability of Conflict") +
       theme_bw()

##################################
# Right panel: Iraq-Kuwait, 1988 #
##################################

val.vec <- as.numeric(rel.data[ik, ])

sim.vals <- GetPreds(xmat           = rel.data,
                     coefs          = b,
                     covmat         = vcov,
                     var.change     = 17,
                     interaction    = NULL,
                     change.lo      = min(rel.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.hi      = max(rel.data$similarity, 
                                          na.rm = TRUE
                                          ),
                     change.vec     = NULL,
                     var.sq         = NULL,
                     app.fn         = val.vec,
                     tval           = 1.96,
                     n.out          = 1000,
                     ci             = TRUE
                     )

sim.df <- data.frame(change     = sim.vals$change,
                     pred       = sim.vals$pred,
                     lo         = sim.vals$lo,
                     hi         = sim.vals$hi
                     )

min.dist <- which(abs(sim.df$change - val.vec[17]) == min(abs(sim.df$change - val.vec[17])))

ggplot(data = sim.df,
       aes(x = change,
           y = pred
           )
       ) +
       geom_line(size   = 1.25,
                 colour = "darkblue"
                 ) +
       geom_ribbon(data = sim.df,
                   aes(ymin = lo,
                       ymax = hi
                       ),
                   fill   = "darkblue",
                   alpha  = 0.3,
                   colour = NA
                   ) +
       geom_point(aes(x      = val.vec[17],
                  y      = sim.df$pred[min.dist]),
                  colour = "red",
                  size   = 3.5
                  ) +
       # ylim(y.lim) +
       ggtitle("Iraq and Kuwait, 1988") +
       xlab("Portfolio Similarity") +
       ylab("Probability of Conflict") +
       theme_bw()
